import sys
import os
import logging
import re
import json
from typing import Dict, List, Any, Optional, Tuple

# 添加项目根路径到 sys.path
sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', '..'))

# 导入 MySQLUtil
from shared.utils.MySQLUtil import MySQLUtil

# 设置日志
logger = logging.getLogger(__name__)

def get_各学历_求职成功的影响因素分布(
    project_id: int,
    questionnaire_ids: List[int],
    product_code: Optional[str] = None,
    project_code: Optional[str] = None,
    region_code: Optional[str] = None,
    education: Optional[str] = None
) -> Dict[str, Any]:
    """
    各学历，求职成功的影响因素分布 - 指标计算函数
    
    ## 指标说明
    该指标用于分析不同学历层次毕业生认为求职成功的关键影响因素分布情况。
    计算该项目对应问题每个选项的选择人数占比，可按学历维度分组统计。
    
    ## Args
        project_id (int): 项目ID，用于查询项目配置信息
        questionnaire_ids (List[int]): 问卷ID集合，用于确定数据范围
        product_code (Optional[str]): 产品编码，用于路由到特定计算逻辑
        project_code (Optional[str]): 项目编码，用于路由到特定计算逻辑
        region_code (Optional[str]): 区域编码，用于路由到特定计算逻辑
        education (Optional[str]): 学历筛选条件，可选值：本科毕业生/专科毕业生/硕士研究生/博士研究生
        
    ## 示例
    ### 输入
    ```json
    {
        "project_id": 5895,
        "questionnaire_ids": [11158, 11159],
        "education": "博士研究生"
    }
    ```
    
    ### 输出
    ```json
    {
        "success": true,
        "message": "ok",
        "code": 0,
        "result": [
            {
                "group": "博士研究生",
                "data": [
                    {"key": "专业相关能力强", "val": 0.25},
                    {"key": "实践、实习经验丰富", "val": 0.18},
                    {"key": "学历层次高", "val": 0.12},
                    {"key": "掌握充分的就业信息", "val": 0.15},
                    {"key": "母校声誉高", "val": 0.08},
                    {"key": "资格证书、获奖经历多", "val": 0.07},
                    {"key": "学生干部经历", "val": 0.05},
                    {"key": "形象气质好", "val": 0.03},
                    {"key": "掌握求职技巧", "val": 0.04},
                    {"key": "人脉关系广", "val": 0.02},
                    {"key": "其他", "val": 0.01}
                ]
            },
            {
                "group": "汇总",
                "data": [...]
            }
        ]
    }
    ```
    """
    logger.info(f"开始计算指标: 各学历，求职成功的影响因素分布, 项目ID: {project_id}")
    
    try:
        db = MySQLUtil()  

        # 1. 查询项目配置信息
        project_sql = """
        SELECT client_code, item_year, dy_target_items, split_tb_paper 
        FROM client_item 
        WHERE id = %s
        """
        project_info = db.fetchone(project_sql, (project_id,))
        if not project_info:
            raise ValueError(f"未找到项目ID={project_id}的配置信息")

        client_code = project_info['client_code']
        item_year = project_info['item_year']
        split_tb_paper = project_info['split_tb_paper']
        
        logger.info(f"项目配置: client_code={client_code}, item_year={item_year}, split_tb_paper={split_tb_paper}")

        # 2. 计算 shard_tb_key
        shard_tb_key = re.sub(r'^[A-Za-z]*0*', '', client_code)
        logger.info(f"计算得到 shard_tb_key: {shard_tb_key}")

        # 3. 查询问卷信息
        questionnaire_sql = f"""
        SELECT id, dy_target 
        FROM wt_template_customer 
        WHERE id IN ({','.join(['%s'] * len(questionnaire_ids))})
        """
        questionnaires = db.fetchall(questionnaire_sql, tuple(questionnaire_ids))
        if not questionnaires:
            raise ValueError(f"未找到问卷ID集合={questionnaire_ids}的配置信息")
        
        logger.info(f"查询到问卷信息: {questionnaires}")

        # 4. 过滤特定调研对象的问卷
        valid_questionnaire_ids = [q['id'] for q in questionnaires if q['dy_target'] == 'GRADUATE_SHORT']
        if not valid_questionnaire_ids:
            raise ValueError("未找到目标调研对象的问卷ID")
            
        logger.info(f"找到有效问卷ID: {valid_questionnaire_ids}")

        # 5. 查询问题信息
        question_sql = """
        SELECT id, wt_code, wt_obj 
        FROM wt_template_question_customer 
        WHERE cd_template_id = %s AND wt_code = %s AND is_del = 0
        """
        question_info = db.fetchone(question_sql, (valid_questionnaire_ids[0], 'T00024572'))
        if not question_info:
            raise ValueError("未找到指定问题编码的问题信息")
            
        logger.info(f"找到问题信息: {question_info['id']}")

        # 6. 解析问题选项
        wt_obj = json.loads(question_info['wt_obj'])
        options = [{
            'key': item['key'],
            'val': item['val']
        } for item in wt_obj['itemList']]

        # 7. 构建动态表名
        answer_table = f"re_dy_paper_answer_{split_tb_paper}"
        student_table = f"dim_client_target_baseinfo_student_{item_year}"

        # 8. 计算逻辑
        def _calc_by_education(edu_filter=None):
            """按学历维度计算"""
            where_clause = "AND s.education = %s" if edu_filter else ""
            params = [valid_questionnaire_ids[0], question_info['id'], shard_tb_key, item_year]
            if edu_filter:
                params.append(edu_filter)

            sql = f"""
            SELECT
                SUM(t1.c1) as '专业相关能力强',
                SUM(t1.c2) as '实践、实习经验丰富',
                SUM(t1.c3) as '学历层次高',
                SUM(t1.c4) as '掌握充分的就业信息',
                SUM(t1.c5) as '母校声誉高',
                SUM(t1.c6) as '资格证书、获奖经历多',
                SUM(t1.c7) as '学生干部经历',
                SUM(t1.c8) as '形象气质好',
                SUM(t1.c9) as '掌握求职技巧',
                SUM(t1.c10) as '人脉关系广',
                SUM(t1.c11) as '其他',
                COUNT(*) as total
            FROM {answer_table} t1
            JOIN {student_table} s ON t1.target_no = s.target_no
            WHERE
                t1.cd_template_id = %s
                AND t1.wid = %s
                AND t1.ans_true = 1
                AND s.shard_tb_key = %s
                AND s.item_year = %s
                {where_clause}
            """
            result = db.fetchone(sql, tuple(params))
            if not result or result['total'] == 0:
                return None

            total = float(result['total'])
            return [{
                'key': option['val'],
                'val': float(result[option['val']] or 0) / total
            } for option in options]

        # 9. 执行计算
        result = []
        
        # 按学历分组计算
        if education:
            # 如果指定了学历，只计算该学历
            edu_data = _calc_by_education(education)
            if edu_data:
                result.append({
                    "group": education,
                    "data": edu_data
                })
        else:
            # 如果没有指定学历，计算所有学历
            education_list = ["本科毕业生", "专科毕业生", "硕士研究生", "博士研究生"]
            for edu in education_list:
                edu_data = _calc_by_education(edu)
                if edu_data:
                    result.append({
                        "group": edu,
                        "data": edu_data
                    })
        
        # 计算总体数据
        all_data = _calc_by_education()
        if all_data:
            result.append({
                "group": "汇总",
                "data": all_data
            })

        if not result:
            raise ValueError("未获取到有效数据")

        logger.info(f"指标 '各学历，求职成功的影响因素分布' 计算成功")
        return {
            "success": True,
            "message": "ok",
            "code": 0,
            "result": result
        }

    except Exception as e:
        logger.error(f"计算指标 '各学历，求职成功的影响因素分布' 时发生错误: {str(e)}", exc_info=True)
        return {
            "success": False,
            "message": f"数据获取失败: 各学历，求职成功的影响因素分布",
            "code": 500,
            "error": str(e)
        }